﻿@article{
   Author = {Gengbin, Zheng and Chao, Huang and Laxmikant, V. Kal},
   Title = {Performance evaluation of automatic checkpoint-based fault tolerance for AMPI and Charm++},
   Journal = {SIGOPS Oper. Syst. Rev.},
   Volume = {40},
   Number = {2},
   Pages = {90-99},
   Note = {1131340},
   Year = {2006} }



@inproceedings{
   Author = {Jiang, Q. and Manivannan, D.},
   Title = {An optimistic checkpointing and selective message logging approach for consistent global checkpoint collection in distributed systems},
   BookTitle = {Parallel and Distributed Processing Symposium, 2007. IPDPS 2007. IEEE International},
   Pages = {1-10},
   Year = {2007} }



@inproceedings{
   Author = {Naksinehaboon, N. and Yudan, Liu and Leangsuksun, C. and Nassar, R. and Paun, M. and Scott, S. L.},
   Title = {Reliability-Aware Approach: An Incremental Checkpoint/Restart Model in HPC Environments},
   BookTitle = {Cluster Computing and the Grid, 2008. CCGRID '08. 8th IEEE International Symposium on},
   Pages = {783-788},
   Year = {2008} }



@inproceedings{CP:Next,
   Author = {Oldfield, Ron A. and Arunagiri, Sarala and Teller, Patricia J. and Seelam, Seetharami and Varela, Maria Ruiz and Riesen, Rolf and Roth, Philip C.},
   Title = {{Modeling the Impact of Checkpoints on Next-Generation Systems}},
   BookTitle = {Mass Storage Systems and Technologies, 2007. MSST 2007. 24th IEEE Conference on},
   Pages = {30-46},
   Year = {2007} }



@inproceedings{
   Author = {Plank, J. S. and Kai, Li},
   Title = {Faster checkpointing with N+1 parity},
   BookTitle = {Fault-Tolerant Computing, 1994. FTCS-24. Digest of Papers., Twenty-Fourth International Symposium on},
   Pages = {288-297},
   Year = {1994} }



@inproceedings{CP:Diskless,
   Author = {Silva, L. M. and Silva, J. G.},
   Title = {{An Experimental Study about Diskless Checkpointing}},
   BookTitle = {Euromicro Conference, 1998. Proceedings. 24th},
   Volume = {1},
   Pages = {395-402 vol.1},
   Year = {1998} }



@inproceedings{
   Author = {Tzi-Cker, Chiueh and Peitao, Deng},
   Title = {Evaluation of checkpoint mechanisms for massively parallel machines},
   BookTitle = {Fault Tolerant Computing, 1996., Proceedings of Annual Symposium on},
   Pages = {370-379},
   Year = {1996} }

@inproceedings{CP:Local,
   Author = {Sobe, P.},
   Title = {{Stable Checkpointing in Distributed Systems without Shared Disks}},
   BookTitle = {Parallel and Distributed Processing Symposium, 2003. Proceedings. International},
   Pages = {8 pp.},
   Year = {2003} }

@misc{CP:LANL,
    Author = {{Los Alamos National Laboratory}},
    Title = {{Operational Data to Support and Enable Computer Science Research}},
    Note = {\url{http://institutes.lanl.gov/data/fdata/}}
    }

@article{CP:Daly,
   Author = {Daly, J. T.},
   Title = {{A Higher Order Estimate of the Optimum Checkpoint Interval for Restart Dumps}},
   Journal = {Future Gener. Comput. Syst.},
   Volume = {22},
   Number = {3},
   Pages = {303-312},
   Note = {1134248},
      Year = {2006} }

@misc{CP:NPB,
    Author = {{NASA}},
    Title = {{NASA Parallel Benchmarks}},
    Note = {\url{http://www.nas.nasa.gov/Resources/Software/npb.html}}
    }

@misc{CP:roadrunner,
    Author = {{Los Alamos National Lab}},
    Title = {{RR Seminar - System Overview}},
    Note = {\url{http://www.lanl.gov/orgs/hpc/roadrunner/pdfs/Koch - Roadrunner Overview/RRSeminar - System Overview.pdf}}
    }
